Libraries

require(lavaan)
require(psych)
require(dplyr)
require(tidyr)
require(semTools)
require(glue)
require(openxlsx)
require(ggplot2)
require(gridExtra)
require(kableExtra)
library(grid)
# install_github('ivanvoronin/mlth.data.frame')
# install_github('ivanvoronin/FAT')
library(mlth.data.frame)
library(FAT)
## Auxiliary functions
#  White background for combined plots
white_bg <- function(gg) {
  grobTree(
    rectGrob(
      gp = gpar(
        fill = 'white', 
        lwd = 0
      )
    ), 
    gg)  
}

#  Strip leading zero when abs(x) < 1
format_cor <- function(
  x, 
  digits = 2, 
  width = digits + 2
) {
  require(glue)
  require(dplyr)
  x %>%
    sprintf(glue('%0.{digits}f'), .) %>%
    gsub('(^-?)0\\.', '\\1\\.', .) %>%
    sprintf(glue('%{width}s'), .)
}

#  Extract a legend from the plot
#  From https://stackoverflow.com/questions/13649473/add-a-common-legend-for-combined-ggplots
g_legend <- function(a.gplot){
  tmp <- ggplot_gtable(ggplot_build(a.gplot))
  leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
  legend <- tmp$grobs[[leg]]
  return(legend)
}

# Format p (three digits, no leading zero, < 0.001)
format_p <- function(p) {
  ifelse(
    p < 0.001,
    '< .001',
    sub(
      "^(-?)0.", 
      "\\1.", 
      sprintf("%.3f", p)
    )
  )
}

# Add html-indentation
indent <- function(x)
  cell_spec(
    x,
    extra_css = 'padding-left: 1.3em;'
  )

Read the data

# The script assumes no missing data
data <- read.csv('data/All.csv') %>%
  select(
    Age,
    Gender, 
    Country, 
    Source,
    Method,
    AMAS1:AMAS9
  ) %>%
  mutate(
    Gender = factor(
      Gender,
      levels = 1:2,
      labels = c(
        'Male', 
        'Female'
      )
    ),
    Country = factor(
      Country,
      levels = 1:3,
      labels = c(
        'UK', 
        'Russia', 
        'China'
      )
    ),
    Method = factor(
      Method,
      levels = 1:2,
      labels = c('Online', 'Paper')
    ),
    Source = case_when(
      Country == 'UK' ~
        sapply(
          Source,
          switch,
          'Cohort 1',
          'Cohort 2',
          'Cohort 3',
          'Cohort 3',
          'Cohort 3',
          NA, NA, NA,
          'Cohort 4',
          'Cohort 6', 
          'Cohort 5'
        ),
      Country == 'Russia' ~ 'Cohort 1',
      Country == 'China' ~
        sapply(
          Source,
          switch,
          NA, NA, NA,
          NA, NA, NA,
          'Cohort 1',
          'Cohort 2',
          NA, NA, NA
        )
    ),
    Total = rowSums(
      .[glue('AMAS{1:9}')]
    ),
    Learning = rowSums(
      .[glue('AMAS{c(1, 3, 6, 7, 9)}')]
    ),
    Testing = rowSums(
      .[glue('AMAS{c(2, 4, 8, 5)}')]
    )
  ) %>%
  filter_at(
    vars(!!glue('AMAS{1:9}')), 
    all_vars(. %in% 1:5)
  )

AMAS_items <- matrix(
  c(
    'AMAS2', '2', 'Testing',  'Thinking about upcoming maths test',
    'AMAS4', '4', 'Testing',  'Taking examination in maths course',
    'AMAS8', '8', 'Testing',  'Being given an "pop" quiz in maths class',
    'AMAS5', '5', 'Testing',  'Being given a homework due the next class',
    'AMAS1', '1', 'Learning', 'Having to use tables',
    'AMAS3', '3', 'Learning', 'Watching teacher work equation',
    'AMAS6', '6', 'Learning', 'Listening to lecture in maths class',
    'AMAS7', '7', 'Learning', 'Listening to another student explain a formula',
    'AMAS9', '9', 'Learning', 'Starting a new chapter in maths book'
  ),
  ncol = 4,
  byrow = TRUE,
  dimnames = list(
    NULL, 
    c(
      'item', 
      'item_lab', 
      'scale', 
      'item_content'
    )
  )
) %>%
  as.data.frame

Method

Samples and procedures

data_sum_table <- function(d) {
  list(
    Total = d,
    Female = d %>% filter(Gender == 'Female'),
    Male = d %>% filter(Gender == 'Male')
  ) %>%
    lapply(
      function(x) {
        data.frame(
          N = nrow(x),
          M_age = mean(x$Age, na.rm = TRUE),
          SD_age = sd(x$Age, na.rm = TRUE),
          Range_age = glue(
            '{mi} - {ma}',
            mi = round(min(x$Age, na.rm = TRUE)),
            ma = round(max(x$Age, na.rm = TRUE))
          ),
          Online = sum(x$Method == 'Online', na.rm = TRUE),
          Paper = sum(x$Method == 'Paper', na.rm = TRUE)
        )
      }
    ) %>%
    do.call('rbind', .) %>%
    data.frame(
      ' ' = row.names(.), 
      ., 
      check.names = FALSE) %>%
    `row.names<-`(NULL)
}
# SOM 1 - Table 1. Sample age characteristics and collection methods
c(
  UK = list(
    data %>% filter(Country == 'UK')
  ),
  data %>% filter(Country == 'UK') %>% split(.$Source),
  Russia = list(
    data %>% filter(Country == 'Russia')
  ),
  China = list(
    data %>% filter(Country == 'China')
  ),
  data %>% filter(Country == 'China') %>% split(.$Source)
) %>%
  lapply(data_sum_table) %>%
  bind_rows(.id = '  ') %>%
  mutate(
    '  ' = case_when(
      c(0, `  `[-length(`  `)]) != `  ` ~ `  `,
      TRUE ~ ' '
    )
  ) %>%
  kable(
    caption = 'SOM 1 - Table 1. Sample age characteristics and collection methods',
    align = 'c',
    digits = 2,
    col.names = c(
      ' ', ' ',
      'N', 'M<sub>age</sub>',
      'SD<sub>age</sub>', 'Range<sub>age</sub>',
      'Online', 'Paper'
    ),
    escape = FALSE
  ) %>%
  kable_styling %>%
  add_header_above(
    c(
      rep(' ', 6),
      'Method Ns' = 2
    )
  ) %>%
  row_spec(
    c(1, 22, 25),
    bold = TRUE
  ) %>%
  footnote(
    'N = number of participants; M = mean; SD = standard deviation. All estimates rounded to two decimal points'
  )
SOM 1 - Table 1. Sample age characteristics and collection methods
Method Ns
N Mage SDage Rangeage Online Paper
UK Total 1043 21.04 2.17 18 - 30 485 558
Female 712 20.90 2.15 18 - 30 309 403
Male 331 21.35 2.16 18 - 30 176 155
Cohort 1 Total 318 21.41 1.84 18 - 30 256 62
Female 235 21.27 1.81 18 - 29 185 50
Male 83 21.81 1.86 18 - 30 71 12
Cohort 2 Total 128 19.67 1.48 18 - 28 0 128
Female 98 19.61 1.48 18 - 28 0 98
Male 30 19.87 1.50 18 - 22 0 30
Cohort 3 Total 199 20.37 2.40 18 - 29 0 199
Female 139 20.21 2.33 18 - 29 0 139
Male 60 20.73 2.52 18 - 29 0 60
Cohort 4 Total 37 20.59 1.34 19 - 26 0 37
Female 22 20.82 1.65 19 - 26 0 22
Male 15 20.27 0.59 19 - 21 0 15
Cohort 5 Total 269 21.22 1.89 18 - 30 182 87
Female 166 21.06 1.99 18 - 30 101 65
Male 103 21.47 1.71 18 - 26 81 22
Cohort 6 Total 92 22.82 2.77 18 - 30 47 45
Female 52 23.01 2.66 19 - 30 23 29
Male 40 22.57 2.92 18 - 29 24 16
Russia Total 842 19.67 1.78 18 - 30 842 0
Female 528 19.50 1.54 18 - 29 528 0
Male 314 19.96 2.10 18 - 30 314 0
China Total 917 20.61 2.04 18 - 30 221 696
Female 506 20.58 2.12 18 - 30 127 379
Male 411 20.64 1.94 18 - 29 94 317
Cohort 1 Total 785 20.63 2.06 18 - 30 89 696
Female 442 20.64 2.14 18 - 30 63 379
Male 343 20.61 1.95 18 - 29 26 317
Cohort 2 Total 132 20.49 1.96 18 - 30 132 0
Female 64 20.17 1.96 18 - 30 64 0
Male 68 20.78 1.92 18 - 27 68 0
Note:
N = number of participants; M = mean; SD = standard deviation. All estimates rounded to two decimal points

Results

Item distributions and descriptive statistics

# Figure 1. Item score distributions in all countries

# Left and right margins as % of plot width
ml <- 0.45
mr <- 0.9

item_plots <- list()

for (i in c('UK', 'Russia', 'China')) {
  item_plots[[i]] <- data %>%
    filter(Country == i) %>%
    select(!!glue('AMAS{1:9}')) %>%
    gather() %>%
    group_by(value, key) %>%
    count() %>%
    ungroup() %>%
    full_join(
      AMAS_items, 
      by = c('key' = 'item')
    ) %>%
    mutate(
      item_content = case_when(
        value == 1 ~ item_content,
        TRUE ~ NA_character_
      ),
      scale = case_when(
        value == 1 ~ scale,
        TRUE ~ NA_character_
      ),
      item_lab = case_when(
        value == 1 ~ item_lab,
        TRUE ~ NA_character_
      )
    ) %>%
    mutate(
      value = factor(
        value, 
        levels = 5:1
      ),
      key = factor(
        key, 
        levels = rev(AMAS_items$item)
      )
    ) %>%
    ggplot(
      aes(
        x = key, 
        y = n,
        fill = value
      )
    ) +
    geom_bar(
      position = 'fill',
      stat = 'identity'
    ) +
    scale_fill_discrete(
      limits = factor(1:5),
      labels = c(
        '1 - Low anxiety',
        '2 - Some anxiety',
        '3 - Moderate anxiety',
        '4 - Quite a bit of anxiety',
        '5 - High anxiety'
      ),
      type = c(
        '#d0d1e6',
        '#a6bddb',
        '#74a9cf',
        '#2b8cbe',
        '#045a8d'
      )
    ) +
    scale_y_continuous(
      expand = expansion(
        add = c(ml, mr)
      ),
      breaks = seq(0, 1, 0.25),
      labels = seq(0, 100, 25)
    ) +
    geom_text(
      aes(label = item_content),
      y = 1.03,
      hjust = 0,
      na.rm = TRUE
    ) +
    annotation_custom(
      grob = textGrob(
        x = 0,
        y = 0,
        hjust = 0,
        vjust = 0,
        label = 'Item content',
        gp = gpar(
          fontface = 'bold'
        ) 
      ),
      xmin = 10,
      xmax = 11,
      ymin = 1.03,
      ymax = 1.1
    ) +
    geom_text(
      aes(label = scale),
      y = -0.4,
      hjust = 0,
      na.rm = TRUE
    ) +
    annotation_custom(
      grob = textGrob(
        x = 0,
        y = 0,
        hjust = 0,
        vjust = 0,
        label = 'Scale',
        gp = gpar(
          fontface = 'bold'
        )
      ),
      xmin = 10,
      xmax = 11,
      ymin = -0.4,
      ymax = -0.3
    ) +
    geom_text(
      aes(label = item_lab),
      y = -0.05,
      hjust = 1,
      na.rm = TRUE
    ) +
    annotation_custom(
      grob = textGrob(
        x = 0.5,
        y = 0.5,
        hjust = 0.5,
        vjust = 0.5,
        label = i,
        gp = gpar(
          fontsize = 20
        )
      ),
      xmin = 10,
      xmax = 11,
      ymin = 0.25,
      ymax = 0.75
    ) +
    annotation_custom(
      grob = textGrob(
        x = 0,
        y = 0,
        hjust = 1,
        vjust = 0,
        label = 'Item №',
        gp = gpar(fontface = 'bold') 
      ),
      xmin = 10,
      xmax = 11,
      ymin = -0.05,
      ymax = -0.05
    ) +
    geom_segment(
      aes(
        x = 0, 
        y = 0,
        xend = 0, 
        yend = 1
      ),
      size = 0.5,
      color = '#00537d',
      inherit.aes = FALSE
    ) +
    geom_hline(
      yintercept = c(0, 1),
      size = 0.5,
      color = '#00537d',
      linetype = 'solid'
    ) +
    labs(
      x = '', y = '%', 
      fill = ''
    ) +
    theme_classic() +
    theme(
      legend.position = 'none',
      legend.direction = 'vertical',
      axis.line.y = element_blank(),
      axis.ticks.y = element_blank(),
      axis.text.y = element_blank(),
      axis.line.x = element_blank(),
      plot.title = element_text(
        size = 18,
        face = 'italic',
        hjust = (ml + 0.5) / (ml + mr + 1),
        vjust = 1
      ),
      axis.title.x = element_text(
        hjust = (ml + 0.5) / (ml + mr + 1)
      ),
      panel.grid.major.x = element_line(
        size = 0.5,
        color = '#00537d'
      ),
      plot.margin = margin(t = 50, b = 10)
    ) +
    coord_flip(clip = 'off')
}

arrangeGrob(
  grobs = item_plots,
  bottom = g_legend(
    item_plots$China +
      theme(
        legend.position = 'bottom',
        legend.direction = 'horizontal'
      )
  )
) %>%
  white_bg %>% 
  grid.draw
Figure 1. Item score distributions in all countries
Figure 1. Item score distributions in all countries
# SOM 2 - Table 1.
count_tables <- by(data, data$Country,
   function(x) {
     counts <- x %>%
       select(!!AMAS_items$item) %>%
       sapply(table) %>%
       t
     
     perc <- counts / rowSums(counts)
     
     outp <- sprintf(
       '%0.0f (%0.1f)', 
       counts, 
       perc * 100
     )
     
     dim(outp) <- dim(counts)
     
     outp <- cbind(
       AMAS_items[, c('scale', 'item_lab')], 
       outp
     ) %>%
       as.data.frame %>%
       arrange(scale)
     
     colnames(outp) <- c(
       'Scale', 'Item No', 
       '1', '2', '3', '4', '5'
     )
     
     outp
   }) 

count_tables %>%
  lapply(
    function(x) {
      names(x)[1] <- indent('Scale')
      return(x)
    }
  ) %>%
  kable2(
    align_first = 'l',
    align = 'c',
    caption = 'SOM 2 - Table 1. Response count and percent (in brackets) by country',
    escape = FALSE
  ) %>%
  kable_styling
SOM 2 - Table 1. Response count and percent (in brackets) by country
Scale Item No 1 2 3 4 5
UK
Learning 1 636 (61.0) 238 (22.8) 106 (10.2) 54 (5.2) 9 (0.9)
Learning 3 544 (52.2) 244 (23.4) 159 (15.2) 58 (5.6) 38 (3.6)
Learning 6 527 (50.5) 279 (26.7) 139 (13.3) 62 (5.9) 36 (3.5)
Learning 7 473 (45.3) 303 (29.1) 159 (15.2) 79 (7.6) 29 (2.8)
Learning 9 564 (54.1) 281 (26.9) 119 (11.4) 60 (5.8) 19 (1.8)
Testing 2 112 (10.7) 251 (24.1) 259 (24.8) 284 (27.2) 137 (13.1)
Testing 4 88 (8.4) 186 (17.8) 271 (26.0) 298 (28.6) 200 (19.2)
Testing 8 191 (18.3) 262 (25.1) 249 (23.9) 193 (18.5) 148 (14.2)
Testing 5 161 (15.4) 280 (26.8) 268 (25.7) 219 (21.0) 115 (11.0)
Russia
Learning 1 695 (82.5) 97 (11.5) 30 (3.6) 14 (1.7) 6 (0.7)
Learning 3 560 (66.5) 163 (19.4) 76 (9.0) 35 (4.2) 8 (1.0)
Learning 6 576 (68.4) 141 (16.7) 88 (10.5) 30 (3.6) 7 (0.8)
Learning 7 525 (62.4) 194 (23.0) 82 (9.7) 37 (4.4) 4 (0.5)
Learning 9 532 (63.2) 186 (22.1) 81 (9.6) 32 (3.8) 11 (1.3)
Testing 2 149 (17.7) 258 (30.6) 212 (25.2) 172 (20.4) 51 (6.1)
Testing 4 51 (6.1) 122 (14.5) 179 (21.3) 269 (31.9) 221 (26.2)
Testing 8 69 (8.2) 175 (20.8) 207 (24.6) 214 (25.4) 177 (21.0)
Testing 5 103 (12.2) 167 (19.8) 215 (25.5) 236 (28.0) 121 (14.4)
China
Learning 1 364 (39.7) 312 (34.0) 174 (19.0) 53 (5.8) 14 (1.5)
Learning 3 334 (36.4) 296 (32.3) 182 (19.8) 83 (9.1) 22 (2.4)
Learning 6 329 (35.9) 332 (36.2) 151 (16.5) 66 (7.2) 39 (4.3)
Learning 7 352 (38.4) 315 (34.4) 175 (19.1) 53 (5.8) 22 (2.4)
Learning 9 265 (28.9) 287 (31.3) 202 (22.0) 101 (11.0) 62 (6.8)
Testing 2 80 (8.7) 171 (18.6) 283 (30.9) 255 (27.8) 128 (14.0)
Testing 4 79 (8.6) 178 (19.4) 257 (28.0) 263 (28.7) 140 (15.3)
Testing 8 76 (8.3) 108 (11.8) 200 (21.8) 250 (27.3) 283 (30.9)
Testing 5 95 (10.4) 176 (19.2) 251 (27.4) 257 (28.0) 138 (15.0)
# Figure 2. Violin plots
boxplot_width <- 0.10
violin_width <- 0.75

violin_plots <- list()

for (i in c('Total', 'Learning', 'Testing')) {
  violin_plots[[i]] <- data %>%
    select(
      Country, 
      var = !!i
    ) %>%
    ggplot(
      aes(
        y = var, 
        x = Country
      )
    ) +
    geom_violin(
      trim = TRUE,
      scale = 'area',
      width = violin_width,
      alpha = 0.15,
      fill = '#A7ACA2'
    ) +
    geom_boxplot(
      width = boxplot_width,
      alpha = 0.7,
      show.legend = FALSE
    ) +
    labs(title = i) +
    ylab('Score') +
    theme_classic() +
    theme(
      axis.title.x = element_blank(),
      axis.ticks.x = element_blank(),
      axis.text.x = element_text(size = 16),
      axis.text.y = element_text(size = 10),
      axis.title.y = element_text(size = 14),
      plot.title = element_text(
        size = 18,
        vjust = 0.5,
        margin = margin(
          b = 10,
          unit = 'pt'
        )
      ),
      legend.position = 'none'
    ) + 
    guides(
      fill = guide_legend(
        title.position = 'left',
        title.hjust = 0,
        title.vjust = 0.5,
        label.position = 'left',
        label.hjust = 0,
        label.vjust = 0.5,
        keywidth = unit(0.45, 'in')
      )
    )
}

arrangeGrob(
  grobs = violin_plots, 
  nrow = 3
) %>%
  white_bg %>%
  grid.draw
Figure 2. Violin plots
Figure 2. Violin plots
# Function to make the table with descriptive statistics
compute_ds <- function(data) {
  # No missing data assumed
  data %>%
    select(
      Total, 
      Learning, 
      Testing
    ) %>%
    lapply(function(x) {
      data.frame(
        M = mean(x),
        SD = sd(x),
        Median = median(x),
        Mode = names(which.max(table(x))),
        'Variance' = var(x),
        Skewness = psych::skew(x),
        Kurtosis = psych::kurtosi(x),
        'Actual range' = glue('{min(x)}-{max(x)}'),
        check.names = FALSE
      )
    }) %>%
    do.call('rbind', .)
}
# Table 1. Descriptive statistics 
descr_tab <- by(data, data$Country, compute_ds)

descr_tab$All <- compute_ds(data)

names(descr_tab) <- glue(
  '{nm}, N = {N}',
  nm = names(descr_tab),
  N = c(table(data$Country), nrow(data))
)

class(descr_tab) <- 'list'

descr_tab %>%
  kable2(
    col.names = c(
      indent('Subscale'),
      'M', 'SD', 'Median', 'Mode', 'Variance', 'Skewness', 'Kurtosis', 'Actual range'
    ),
    align_first = 'l',
    align = 'c',
    digits = 2,
    caption = 'Table 1. Descriptive statistics',
    escape = FALSE
  ) %>%
  kable_styling %>%
  footnote(
    'N = number of participants; M = mean; SD = standard deviation.'
  )
Table 1. Descriptive statistics
Subscale M SD Median Mode Variance Skewness Kurtosis Actual range
UK, N = 1043
Total 21.11 7.48 20 16 55.96 0.66 0.04 9-45
Learning 9.00 4.12 8 5 16.99 1.26 1.28 5-25
Testing 12.11 4.11 12 12 16.92 0.01 -0.81 4-20
Russia, N = 842
Total 20.14 6.42 19 18 41.26 0.61 0.23 9-45
Learning 7.47 3.33 6 5 11.08 1.78 3.38 5-25
Testing 12.67 4.14 13 11 17.12 -0.16 -0.85 4-20
China, N = 917
Total 23.68 7.17 24 23 51.40 0.07 -0.32 9-45
Learning 10.47 3.92 10 5 15.39 0.60 -0.03 5-25
Testing 13.21 4.10 14 15 16.79 -0.38 -0.48 4-20
All, N = 2802
Total 21.66 7.22 21 18 52.15 0.46 -0.18 9-45
Learning 9.02 4.01 8 5 16.09 1.09 0.80 5-25
Testing 12.64 4.14 13 12 17.14 -0.16 -0.77 4-20
Note:
N = number of participants; M = mean; SD = standard deviation.

Polychoric correlations

# Figure 3. Polychoric correlations between AMAS items in all countries
cor_plots <- list()
for (i in c('UK', 'Russia', 'China')) {
  cor_plots[[i]] <- data %>%
    filter(Country == i) %>%
    select(starts_with('AMAS')) %>%
    polychoric(correct = 0) %>%
    { .$rho } %>%
    data.frame(
      var1 = rownames(.), 
      .
    ) %>%
    gather(
      key = 'var2', 
      value = 'corr', 
      -var1
    ) %>%
    mutate(
      var1 = gsub('AMAS', '', var1),
      var1 = factor(
        var1,
        levels = c(
          '2', '4', '8', '5',
          '1', '3', '6', '7', '9'
        )
      ),
      var2 = gsub('AMAS', '', var2),
      var2 = factor(
        var2,
        levels = c(
          '9', '7', '6', '3', 
          '1', '5', '8', '4', '2'
        )
      ),
      scale = setNames(
        AMAS_items$scale, 
        AMAS_items$item_lab
      )[as.character(var2)],
      scale = case_when(
        var1 == var2 ~ scale,
        TRUE ~ NA_character_
      ),
      item = case_when(
        var1 == var2 ~ as.character(var2),
        TRUE ~ NA_character_
      ),
      label = format_cor(corr, width = 3),
      label = case_when(
        label == '1.00' ~ '',
        TRUE ~ label
      ),
      corr = case_when(
        var1 == var2 ~ NA_real_,
        TRUE ~ corr
      )
    ) %>%
    filter(
      as.numeric(var1) <= 10 - as.numeric(var2)
    ) %>%
    ggplot(
      aes(
        var1, var2, 
        fill = corr, 
        label = label
      )
    ) +
    geom_tile(
      color = 'white'
    ) +
    geom_text(
      alpha = 0.8, 
      size = 5.5
    ) +
    scale_fill_gradient2(
      low = 'white',
      high = '#d2645a',
      limits = c(0, 1),
      name = 'Polychoric correlation',
      na.value = NA
    ) +
    scale_x_discrete(
      expand = expansion(
        add = c(2.5, 0.6)
      )
    ) +
    geom_text(
      aes(label = item),
      x = 0.3,
      hjust = 1,
      na.rm = TRUE,
      size = 4.7
    ) +
    annotation_custom(
      grob = textGrob(
        x = 1,
        y = 0,
        hjust = 1,
        vjust = 0,
        label = 'Item №',
        gp = gpar(
          fontface = 'bold',
          fontsize = 13
        ) 
      ),
      xmin = 0,
      xmax = 0.3,
      ymin = 10,
      ymax = 11
    ) +
    geom_text(
      aes(label = scale),
      x = -1.4,
      hjust = 0,
      na.rm = TRUE,
      size = 4.5
    ) +
    annotation_custom(
      grob = textGrob(
        x = 0,
        y = 0,
        hjust = 0,
        vjust = 0,
        label = 'Scale',
        gp = gpar(
          fontface = 'bold',
          fontsize = 13
        )
      ),
      xmin = -1.4,
      xmax = -0.4,
      ymin = 10,
      ymax = 11
    ) +
    annotation_custom(
      grob = textGrob(
        x = 0.5,
        y = 0.5,
        hjust = 0.5,
        vjust = 0.5,
        label = i,
        gp = gpar(
          fontsize = 20
        )
      ),
      xmin = 7.5,
      xmax = 8.5,
      ymin = 6.5,
      ymax = 7.5
    ) +
    labs(
      x = '', y = ''
    ) +
    theme_minimal() +
    theme(
      legend.position = 'none',
      axis.text = element_text(
        size = 13,
        color = 'black'
      ),
      axis.text.y = element_blank(),
      plot.margin = margin(t = 30),
      panel.grid = element_blank()
    ) +
    coord_cartesian(
      clip = 'off'
    )
}

arrangeGrob(
  grobs = cor_plots, 
  nrow = 3,
  bottom = g_legend(
  cor_plots$China +
    theme(legend.position = 'bottom',
          legend.direction = 'horizontal',
          legend.key.width = unit(55, 'points')) +
    guides(fill = guide_colorbar(title.position = 'left',
                                 title.vjust = 0.8,
                                 label.position = 'bottom'))
  )
) %>%
  white_bg %>%
  grid.draw
Figure 3. Polychoric correlations between AMAS items in all countries
Figure 3. Polychoric correlations between AMAS items in all countries

Reliability measures

# Running the models because the they are used for omega reliability

# I collapse categories 4 - 'Quite  a bit of anxiety' and 5 - 'High anxiety'
# for the items from Learning scale
data2 <- data %>%
  mutate(
    AMAS1 = case_when(
      AMAS1 == 5 ~ as.integer(4),
      TRUE ~ AMAS1
    ),
    AMAS3 = case_when(
      AMAS3 == 5 ~ as.integer(4),
      TRUE ~ AMAS3
    ),
    AMAS6 = case_when(
      AMAS6 == 5 ~ as.integer(4),
      TRUE ~ AMAS6
    ),
    AMAS7 = case_when(
      AMAS7 == 5 ~ as.integer(4),
      TRUE ~ AMAS7
    ),
    AMAS9 = case_when(
      AMAS9 == 5 ~ as.integer(4),
      TRUE ~ AMAS9
    )
  )

# Running the model for each country
AMAS_model <- '
  Total =~ AMAS1 + AMAS2 + AMAS3 + AMAS4 + AMAS5 +
           AMAS6 + AMAS7 + AMAS8 + AMAS9
  Learning =~ AMAS1 + AMAS3 + AMAS6 + AMAS7 + AMAS9
  Testing  =~ AMAS2 + AMAS4 + AMAS5 + AMAS8
'

model_list <- list()

options(warn = 1)

for (i in c('UK', 'Russia', 'China')) {
  D <- data2 %>%
    filter(Country == i) %>%
    select(
      Gender, 
      starts_with('AMAS')
    ) %>%
    mutate_at(
      vars(starts_with('AMAS')), 
      ~ ordered(.)
    )
  
  model_list[[i]] <- cfa(
    AMAS_model,
    data = D,
    ordered = TRUE,
    std.lv = TRUE,
    orthogonal = TRUE,
    estimator = "WLSMV"
  )
}

# Running the models of measurement invariance
config_model <- data2 %>%
  select(
    Country, 
    starts_with('AMAS')
  ) %>%
  cfa(
    AMAS_model,
    data = .,
    ordered = TRUE,
    orthogonal = TRUE,
    estimator = 'WLSMV',
    group = 'Country',
    std.lv = TRUE
  )

metric_model <- data2 %>%
  select(
    Country, 
    starts_with('AMAS')
  ) %>%
  cfa(
    AMAS_model,
    data = .,
    ordered = TRUE,
    orthogonal = TRUE,
    estimator = 'WLSMV',
    std.lv = TRUE,
    group = 'Country',
    group.equal = 'loadings'
  )

scalar_model <- data2 %>%
  select(
    Country, 
    starts_with('AMAS')
  ) %>%cfa(
    AMAS_model,
    data = .,
    ordered = TRUE,
    orthogonal = TRUE,
    estimator = 'WLSMV',
    group = 'Country',
    group.equal = c(
      'loadings', 
      'thresholds'
    ),
    std.lv = TRUE
  )
# Table 2. Ordinal Alpha and Omega measures
omega_tab <- model_list %>%
  lapply(
    function(x) {
      om <- omegaFromSem(x, plot = FALSE)
      om_t <- om$omega.group
      
      colnames(om_t) <- c('O_tot', 'O_gen', 'O_gr')
      om_t <- split(om_t, row(om_t))
      om_t <- do.call('mlth.data.frame', om_t)
      names(om_t) <- c('Total', 'Learning', 'Testing')
      return(om_t)
    }
  ) %>%
  do.call('rbind', .) %>%
  `row.names<-`(c('UK', 'Russia', 'China'))
## Warning in split.default(x = seq_len(nrow(x)), f = f, drop = drop, ...): data
## length is not a multiple of split variable

## Warning in split.default(x = seq_len(nrow(x)), f = f, drop = drop, ...): data
## length is not a multiple of split variable

## Warning in split.default(x = seq_len(nrow(x)), f = f, drop = drop, ...): data
## length is not a multiple of split variable

polychor_mat <- list(
  UK = data2 %>% 
    filter(Country == 'UK') %>% 
    select(starts_with('AMAS')),
  Russia = data2 %>%
    filter(Country == 'Russia') %>%
    select(starts_with('AMAS')),
  China = data2 %>%
    filter(Country == 'China') %>%
    select(starts_with('AMAS'))
) %>%
  lapply(
    polychoric, 
    correct = 0,
    global = FALSE) %>%
  lapply(`[[`, 'rho')
## Warning in FUN(X[[i]], ...): The items do not have an equal number of response
## alternatives, global set to FALSE.
## Warning in FUN(X[[i]], ...): The items do not have an equal number of response
## alternatives, global set to FALSE.

## Warning in FUN(X[[i]], ...): The items do not have an equal number of response
## alternatives, global set to FALSE.

alpha_tab <- polychor_mat %>%
  lapply(
    function(m) {
      tot <- psych::alpha(m)
      learn <- m[c('AMAS1', 'AMAS3', 'AMAS6', 'AMAS7', 'AMAS9'),
                 c('AMAS1', 'AMAS3', 'AMAS6', 'AMAS7', 'AMAS9')] %>%
        psych::alpha()
      test <- m[c('AMAS2', 'AMAS4', 'AMAS5', 'AMAS8'),
                c('AMAS2', 'AMAS4', 'AMAS5', 'AMAS8')] %>%
        psych::alpha()
      
      data.frame(
        Total = tot$total[, 'raw_alpha'],
        Learning = learn$total[, 'raw_alpha'],
        Testing = test$total[, 'raw_alpha']
      )
    }
  ) %>%
  do.call('rbind', .) %>%
  as.data.frame

Map(
  function(al, om) {
    data.frame(
      alpha = al,
      om
    )
  },
  alpha_tab,
  omega_tab
) %>%
  lapply(
    function(x) {
      colnames(x) <- c(
        'α',
        'ω Total',
        'ω General',
        'ω Group'
      )
      x
    }
  ) %>% 
  do.call('mlth.data.frame', .) %>%
  `row.names<-`(c('UK', 'Russia', 'China')) %>%
  kable2(
    align_first = 'l',
    align = 'c',
    digits = 2,
    caption = 'Table 2. Ordinal Alpha and Omega measures'
  ) %>%
  kable_styling %>%
  footnote(
    "α = ordinal Alpha; ω = Omega. All estimates rounded to two decimal places."
  )
Table 2. Ordinal Alpha and Omega measures
Total
Learning
Testing
α ω Total ω General ω Group α ω Total ω General ω Group α ω Total ω General ω Group
UK 0.92 0.94 0.86 0.08 0.89 0.90 0.87 0.03 0.87 0.89 0.59 0.30
Russia 0.91 0.95 0.73 0.22 0.90 0.91 0.52 0.39 0.90 0.92 0.66 0.26
China 0.90 0.93 0.79 0.14 0.83 0.85 0.67 0.18 0.90 0.92 0.63 0.29
Note:
α = ordinal Alpha; ω = Omega. All estimates rounded to two decimal places.

Bifactor model and invariance

# SOM 3 - Figure 1. The logic of the bifactor models for AMAS with associated path coefficients
knitr::include_graphics('pics/AMAS_bifactor_model.png')
SOM 3 - Figure 1. The logic of the bifactor models for AMAS with associated path coefficients
SOM 3 - Figure 1. The logic of the bifactor models for AMAS with associated path coefficients
# Table 3. Results for the bifactor models
# Fit statistics
model_list %>%
  lapply(
    fitMeasures,
    c(
      'chisq.scaled', 'df.scaled', 
      'pvalue.scaled', 'cfi.scaled', 
      'tli.scaled', 'rmsea.scaled', 
      'rmsea.ci.lower.scaled', 
      'rmsea.ci.upper.scaled', 'agfi'
    )
  ) %>%
  do.call('rbind', .) %>%
  as.data.frame %>%
  transmute(
    'χ2' = chisq.scaled,
    df = df.scaled,
    p = format_p(pvalue.scaled),
    CFI = cfi.scaled,
    TLI = tli.scaled,
    'RMSEA (95% CI)' = glue(
      '{par} ({cil}-{ciu})',
      par = format_cor(
        rmsea.scaled, 
        width = 3
      ),
      cil = format_cor(
        rmsea.ci.lower.scaled, 
        width = 3
      ),
      ciu = format_cor(
        rmsea.ci.upper.scaled, 
        width = 3
      )
    ),
    AGFI = agfi
  ) %>%
  kable2(
    align = 'c',
    digits = c(2, 0, 3, 2, 2, 0, 2),
    caption = 'Table 3. Results for the bifactor models'
  ) %>%
  kable_styling %>%
  footnote(
    c(
      'χ2 = Chi-square; df = degrees of freedom; p = probability value; CFI = Comparative Fit Index; TLI = Tucker-Lewis Index; RMSEA = Root Mean Square Error of Approximation; CI = Confidence Intervals; AGFI = Adjusted Goodness of Fit Index. See SOM 3 for the path coefficients and error terms.'
    )
  )
Table 3. Results for the bifactor models
χ2 df p CFI TLI RMSEA (95% CI) AGFI
UK 104.80 18 < .001 0.99 0.98 .07 (.06-.08) 1.00
Russia 110.06 18 < .001 0.99 0.98 .08 (.06-.09) 1.00
China 118.12 18 < .001 0.99 0.98 .08 (.06-.09) 0.99
Note:
χ2 = Chi-square; df = degrees of freedom; p = probability value; CFI = Comparative Fit Index; TLI = Tucker-Lewis Index; RMSEA = Root Mean Square Error of Approximation; CI = Confidence Intervals; AGFI = Adjusted Goodness of Fit Index. See SOM 3 for the path coefficients and error terms.
# Table 4. Measurement invariance across countries
compareFit(
  Configural = config_model,
  Metric = metric_model,
  Scalar = scalar_model
) %>%
  (function(x) {
    cbind(x@fit[, c('chisq.scaled', 'df.scaled', 'pvalue.scaled',
                    'cfi.scaled', 'tli.scaled',
                    'rmsea.scaled')],
          x@nested[, c('Chisq diff', 'Df diff', 'Pr(>Chisq)')])
  }) %>%
  mutate(
    pvalue.scaled = format_p(pvalue.scaled),
    'Pr(>Chisq)' = format_p(`Pr(>Chisq)`)
  ) %>%
  setNames(
    c(
      'χ2*', 'df', 'p*',
      'CFI*', 'TLI*', 'RMSEA*', 
      'Δχ2**', 'df',
      'p'
    )
  ) %>%
  kable2(
    align = 'c', 
    digits = 2,
    caption = 'Table 4. Measurement invariance across countries'
  ) %>%
  kable_styling %>%
  footnote(
    'χ2 = Chi-square; df = degrees of freedom; p = p-value; CFI = Comparative Fit Index; TLI = Tucker-Lewis Index; RMSEA = Root Mean Square Error of Approximation; Δ χ2 = Chi-square difference; * Scaled chi-square was used for these statistics; ** Two unscaled chi-squared were used to compute the difference with Satorra (2000) adjustment. All estimates rounded to two decimal places.'
  )
Table 4. Measurement invariance across countries
χ2* df p* CFI* TLI* RMSEA* Δχ2** df p
Configural 331.75 54 < .001 0.99 0.98 0.07
Metric 500.99 84 < .001 0.99 0.98 0.07 136.39 30 < .001
Scalar 1541.21 122 < .001 0.95 0.96 0.11 734.24 38 < .001
Note:
χ2 = Chi-square; df = degrees of freedom; p = p-value; CFI = Comparative Fit Index; TLI = Tucker-Lewis Index; RMSEA = Root Mean Square Error of Approximation; Δ χ2 = Chi-square difference; * Scaled chi-square was used for these statistics; ** Two unscaled chi-squared were used to compute the difference with Satorra (2000) adjustment. All estimates rounded to two decimal places.
# SOM 3 - Table 1. Path coefficients and standard errors
lambda_table <- model_list %>%
  lapply(
    function(m) {
      m %>%
        standardizedSolution %>%
        filter(
          op == '=~'
        ) %>%
        arrange(
          lhs, rhs
        ) %>%
        mutate(
          est = format_cor(est.std, width = 3),
          cil = format_cor(ci.lower, width = 3),
          ciu = format_cor(ci.upper, width = 3)
        ) %>%
        with(
          glue('{est} ({cil}, {ciu})')
        )
    }
  ) %>%
  data.frame(
    row.names = glue(
      '$λ_{<1:18>}$', 
      .open = '<', 
      .close = '>'
    )
  )

se_table <- model_list %>%
  lapply(
    function(m) {
      m %>%
        standardizedSolution %>%
        filter(
          op == '=~'
        ) %>%
        arrange(
          lhs, rhs
        ) %>%
        mutate(
          se = format_cor(se, width = 3)        
        ) %>%
        pull(se)
    }
  ) %>%
  data.frame(
    row.names = glue('e{1:18}')
  )

list(
  'Standardised coefficients (95% CI)' = lambda_table,
  'Error terms' = se_table
) %>%
  kable2(
    align_first = 'l',
    align = 'c',
    caption = 'SOM 3 - Table 1. Path coefficients and standard errors'
  ) %>%
  kable_styling %>%
  footnote('CI = Confidence Intervals; $λ_1$ – $λ_{18}$ = standardized coefficients; e1 – e18 = error terms. All estimates rounded to two decimal places')
SOM 3 - Table 1. Path coefficients and standard errors
UK Russia China
Standardised coefficients (95% CI)
\(λ_{1}\) -.11 (-.28, .07) .25 (.11, .39) .07 (-.07, .22)
\(λ_{2}\) .07 (-.06, .20) .43 (.31, .55) .23 (.06, .40)
\(λ_{3}\) .39 (.20, .57) .72 (.64, .79) .39 (.20, .58)
\(λ_{4}\) .32 (.15, .48) .70 (.62, .78) .65 (.51, .79)
\(λ_{5}\) .10 (-.02, .23) .55 (.47, .64) .31 (.15, .46)
\(λ_{6}\) .59 (.53, .65) .51 (.39, .63) .55 (.46, .64)
\(λ_{7}\) .66 (.60, .72) .67 (.55, .80) .60 (.51, .69)
\(λ_{8}\) .22 (.15, .29) .21 (.03, .39) .18 (.01, .36)
\(λ_{9}\) .38 (.32, .44) .41 (.28, .53) .56 (.47, .64)
\(λ_{10}\) .71 (.66, .77) .56 (.47, .65) .54 (.47, .61)
\(λ_{11}\) .63 (.58, .68) .65 (.56, .73) .66 (.59, .74)
\(λ_{12}\) .85 (.81, .88) .72 (.64, .81) .69 (.61, .76)
\(λ_{13}\) .63 (.58, .68) .62 (.51, .73) .67 (.59, .75)
\(λ_{14}\) .72 (.68, .77) .87 (.79, .96) .83 (.74, .92)
\(λ_{15}\) .83 (.78, .88) .60 (.51, .69) .77 (.68, .86)
\(λ_{16}\) .76 (.71, .80) .60 (.52, .69) .58 (.49, .67)
\(λ_{17}\) .63 (.59, .68) .73 (.66, .80) .62 (.54, .70)
\(λ_{18}\) .75 (.71, .80) .54 (.46, .63) .60 (.51, .68)
Error terms
e1 .09 .07 .07
e2 .07 .06 .09
e3 .10 .04 .10
e4 .08 .04 .07
e5 .06 .04 .08
e6 .03 .06 .05
e7 .03 .06 .05
e8 .03 .09 .09
e9 .03 .06 .05
e10 .03 .05 .04
e11 .02 .04 .04
e12 .02 .04 .04
e13 .02 .06 .04
e14 .02 .04 .05
e15 .03 .04 .05
e16 .03 .04 .04
e17 .02 .04 .04
e18 .02 .04 .04
Note:
CI = Confidence Intervals; \(λ_1\)\(λ_{18}\) = standardized coefficients; e1 – e18 = error terms. All estimates rounded to two decimal places

Exploratory factor analysis

UK

data %>%
  filter(Country == 'UK') %>%
  select(starts_with('AMAS')) %>%
  polychoric(correct = 0) %>%
  { .$rho } %>%
  scree(factors = FALSE)

data %>%
  filter(Country == 'UK') %>%
  select(starts_with('AMAS')) %>%
  fa(
    nfactors = 3, 
    rotate = 'varimax',
    fm = 'pa',
    cor = 'poly'
  ) %>%
  loadings2table(
    hints =
      setNames(
        glue(
          '{sc} - {ic}',
          sc = AMAS_items$scale,
          ic = AMAS_items$item_content),
        AMAS_items$item
      )
  )

Click on a row to get a hint with item’s content. Click again to close the hint. Double click anywhere to hide all hints.

Russia

data %>%
  filter(Country == 'Russia') %>%
  select(starts_with('AMAS')) %>%
  polychoric(correct = 0) %>%
  { .$rho } %>%
  scree(factors = FALSE)

data %>%
  filter(Country == 'Russia') %>%
  select(starts_with('AMAS')) %>%
  fa(
    nfactors = 3, 
    rotate = 'varimax',
    fm = 'pa',
    cor = 'poly'
  ) %>%
  loadings2table(
    hints =
      setNames(
        glue(
          '{sc} - {ic}',
          sc = AMAS_items$scale,
          ic = AMAS_items$item_content),
        AMAS_items$item
      )
  )

China

data %>%
  filter(Country == 'China') %>%
  select(starts_with('AMAS')) %>%
  polychoric(correct = 0) %>%
  { .$rho } %>%
  scree(factors = FALSE)

data %>%
  filter(Country == 'China') %>%
  select(starts_with('AMAS')) %>%
  fa(
    nfactors = 3, 
    rotate = 'varimax',
    fm = 'pa',
    cor = 'poly'
  ) %>%
  loadings2table(
    hints =
      setNames(
        glue(
          '{sc} - {ic}',
          sc = AMAS_items$scale,
          ic = AMAS_items$item_content),
        AMAS_items$item
      )
  )